All of PBMC datasets are processed by Pipeline-Version: cellranger-7.1.0
# Import necessary libraries
import scanpy as sc #software suite of tools for single-cell analysis in python
import besca as bc #internal BEDA package for single cell analysis
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
import anndata as ad
from scipy.sparse import csr_matrix
import scanpy.external as sce
from harmony import harmonize
import umap.umap_ as umap
import os
from scipy import io
print(ad.__version__)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmpc_pipj5_ INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmpc_pipj5_/_remote_module_non_scriptable.py INFO:lightning_fabric.utilities.seed:Global seed set to 0
0.9.1
pbmcsarc1: SAM24412250-Sarcoidosis_Donor1_PBMC-male-57yrs-white from Genentech (10x Genomics Chromium v3.1 3’ NovaSeq 6000)
pbmcsarc2: SAM24412252 Sarcoidosis_Donor2_PBMC: male-35yrs-southasisan sequenced by Genentech (10x Genomics Chromium v3.1 3’ NovaSeq 6000)
pbmcsarc3: SAM24412252 Sarcoidosis_Donor3_PBMC: female-60yrs-white sequenced by Genentech (10x Genomics Chromium v3.1 3’ NovaSeq 6000)
pbmchealthy1: SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_h1: healthy female-19yrs from 10x Genomics database (10x Genomics Chromium v3.1 3’ NovaSeq 6000). For more info link
pbmchealthy2: 5k_pbmc_v3_nextgem_fastqs_h2 from 10x Genomics database a healthy donor (gender not specified) (10x Genomics Chromium v3.1 3’ NovaSeq 6000). For more info Link
pbmchealthy3: 3p_Citrate_CPT_fastqs_h3: Healthy female from 10x Genomics database (10x Genomics Chromium v3.1 3’ NovaSeq 6000). For more info Link
pbmchealthy4: 10k_PBMC_3p_nextgem_Chromium_X_fastqs_h4: Healthy female-25-30yrs (10x Genomics Chromium v3.1 3’ NovaSeq 6000). For more info Link
# Load 10x Genomics data for the first directory - Disease PBMC dataset1 for sarcoidosis
pbmcsarc1 = sc.read_10x_mtx('/raid02/Data-live/tjana/LIB5455299_SAM24412250/outs/filtered_feature_bc_matrix/',
var_names='gene_symbols', cache=True)
# Load 10x Genomics data for the second directory - Disease PBMC dataset2 for sarcoidosis
pbmcsarc2 = sc.read_10x_mtx('/raid02/Data-live/tjana/LIB5455301_SAM24412252/outs/filtered_feature_bc_matrix/',
var_names='gene_symbols', cache=True)
# Load 10x Genomics data for the third directory - Disease PBMC dataset3 for sarcoidosis
pbmcsarc3 = sc.read_10x_mtx('/raid02/Data-live/tjana/LIB5455303_SAM24412254/outs/filtered_feature_bc_matrix/',
var_names='gene_symbols', cache=True)
# Load 10x Genomics data for the fourth directory - Healthy PBMC Control1 from 10X library
pbmchealthy1 = sc.read_10x_mtx('/raid02/Data-live/tjana/multi/SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_h1/outs/per_sample_outs/PBMCs_human_2/count/sample_filtered_feature_bc_matrix/',
var_names='gene_symbols', cache=True)
# Load 10x Genomics data for the fifth directory - Healthy PBMC Control2 from 10X library
pbmchealthy2 = sc.read_10x_mtx('/raid02/Data-live/tjana/5k_pbmc_v3_nextgem_fastqs_h2/outs/filtered_feature_bc_matrix/',
var_names='gene_symbols', cache=True)
# Load 10x Genomics data for the sixth directory - Healthy PBMC Control3 from 10X library
pbmchealthy3 = sc.read_10x_mtx('/raid02/Data-live/tjana/3p_Citrate_CPT_fastqs_h3/outs/filtered_feature_bc_matrix/',
var_names='gene_symbols', cache=True)
# Load 10x Genomics data for the seventh directory - Healthy PBMC Control4 from 10X library
pbmchealthy4 = sc.read_10x_mtx('/raid02/Data-live/tjana/10k_PBMC_3p_nextgem_Chromium_X_fastqs_h4/outs/filtered_feature_bc_matrix/',
var_names='gene_symbols', cache=True)
... writing an h5ad cache file to speedup reading next time ... writing an h5ad cache file to speedup reading next time ... writing an h5ad cache file to speedup reading next time ... writing an h5ad cache file to speedup reading next time ... writing an h5ad cache file to speedup reading next time ... writing an h5ad cache file to speedup reading next time ... writing an h5ad cache file to speedup reading next time
# Make variable names unique for each dataset
pbmcsarc1.var_names_make_unique()
pbmcsarc2.var_names_make_unique()
pbmcsarc3.var_names_make_unique()
pbmchealthy1.var_names_make_unique()
pbmchealthy2.var_names_make_unique()
pbmchealthy3.var_names_make_unique()
pbmchealthy4.var_names_make_unique()
# Explore the loaded data for each dataset using a for loop
for adata_list in [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]:
display(adata_list)
AnnData object with n_obs × n_vars = 7438 × 36601
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 10029 × 36601
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 8754 × 36601
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 6093 × 36601
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 5184 × 36601
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 3958 × 36601
var: 'gene_ids', 'feature_types'
AnnData object with n_obs × n_vars = 11999 × 36601
var: 'gene_ids', 'feature_types'
# Visualize highest expression genes for each dataset in separate panels using a for loop
# Explore the loaded data for each dataset using a for loop
print ("")
for adata_list in [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]:
sc.pl.highest_expr_genes(adata_list, n_top=20)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
from tabulate import tabulate
from termcolor import colored
table = [
['Samples', 'Gene1', 'Gene2', 'Gene3', 'Gene4', 'Gene5', 'Gene6', 'Gene7', 'Gene8', 'Gene9', 'Gene10', 'Gene11', 'Gene12', 'Gene13', 'Gene14', 'Gene15', 'Gene16', 'Gene17', 'Gene18', 'Gene19', 'Gene20'],
['pbmcsarc1', 'MALAT1', 'HBB', 'HBA2', 'MT-ATP6', 'MT-CO3', 'MT-CO1', 'B2M', 'MT-CO2', 'RPL13', 'TMSB4X', 'MT-ND3', 'EEF1A1', 'RPL1', 'MT-CYB', 'RPL10', 'TPT1', 'MT-ND4', 'RPL41', 'RPS27', 'LYZ'],
['pbmcsarc2', 'MALAT1', 'MT-CO1', 'MT-ATP6', 'B2M', 'RPS27', 'MT-CO3', 'MT-CO2', 'MT-ND3', 'TMSB4X', 'RPL13', 'RPS29', 'EEF1A1', 'RPLP1', 'RPL41', 'RPL10', 'HBB', 'RPS12', 'TPT1', 'LYZ', 'MT-CYB'],
['pbmcsarc3', 'MALAT1', 'HBB', 'MT-ATP6', 'MT-CO3', 'MT-CO1', 'MT-CO2', 'B2M', 'RPL13', 'RPS27', 'RPLP1', 'RPL41', 'RPL10', 'TMSB4X', 'MT-ND3', 'RPS12', 'EEF1A1', 'TPT1', 'RPS18', 'MT-ND4', 'MT-CYB'],
['pbmchealthy1', 'MALAT1', 'MT-CO1', 'MT-CO2', 'MT-CO3', 'MT-ATP6', 'B2M', 'RPS27', 'MT-ND3', 'EEF1A1', 'RPL41', 'RPL13', 'RPS12', 'RPL10', 'MT-ND4', 'MT-CYB', 'TPT1', 'TMSB4X', 'RPL34', 'RPL32', 'RPLP1'],
['pbmchealthy2', 'MALAT1', 'MT-CO1', 'MT-CO2', 'MT-CO3', 'MT-ATP6', 'MT-ND4', 'EEF1A1', 'B2M', 'MT-CYB', 'RPL10', 'RPS12', 'RPL13', 'RPL41', 'RPLP1', 'TPT1', 'TMSB4X', 'RPS27', 'MT-ND3', 'RPL32', 'MT-ND1'],
['pbmchealthy3', 'MALAT1', 'HBB', 'MT-CO3', 'MT-CO2', 'MT-ATP6', 'MT-CO1', 'MT-CYB', 'MT-ND4', 'B2M', 'HBA2', 'MT-ND1', 'RPL10', 'EEF1A1', 'RPS12', 'RPL13', 'TMSB4X', 'MT-ND3', 'TPT1', 'RPL41', 'RPS27'],
['pbmchealthy4', 'MALAT1', 'MT-CO1', 'MT-ATP6', 'B2M', 'MT-CO2', 'MT-CO3', 'EEF1A1', 'TMSB4X', 'RPL13', 'TPT1', 'MT-ND3', 'RPL10', 'RPS12', 'RPS27', 'MT-CYB', 'RPL41', 'MT-ND4', 'S100A9', 'RPS18', 'RPL34']
]
# Check the maximum length of rows
max_length = max(len(row) for row in table)
# Add empty strings to rows with fewer elements
for i in range(len(table)):
while len(table[i]) < max_length:
table[i].append('')
headers = table[0]
rows = table[1:]
# Color the common columns of all rows
for j in range(1, len(rows[0])):
common_gene = rows[1][j]
is_common = all(row[j] == common_gene for row in rows)
for i in range(len(rows)):
rows[i][j] = colored(f'[{rows[i][j]}]', 'green') if is_common else colored(f'{{{rows[i][j]}}}', 'red')
print(tabulate(rows, headers=headers, tablefmt="grid"))
+--------------+----------+----------+-----------+-----------+-----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+ | Samples | Gene1 | Gene2 | Gene3 | Gene4 | Gene5 | Gene6 | Gene7 | Gene8 | Gene9 | Gene10 | Gene11 | Gene12 | Gene13 | Gene14 | Gene15 | Gene16 | Gene17 | Gene18 | Gene19 | Gene20 | +==============+==========+==========+===========+===========+===========+==========+==========+==========+==========+==========+==========+==========+==========+==========+==========+==========+==========+==========+==========+==========+ | pbmcsarc1 | [MALAT1] | {HBB} | {HBA2} | {MT-ATP6} | {MT-CO3} | {MT-CO1} | {B2M} | {MT-CO2} | {RPL13} | {TMSB4X} | {MT-ND3} | {EEF1A1} | {RPL1} | {MT-CYB} | {RPL10} | {TPT1} | {MT-ND4} | {RPL41} | {RPS27} | {LYZ} | +--------------+----------+----------+-----------+-----------+-----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+ | pbmcsarc2 | [MALAT1] | {MT-CO1} | {MT-ATP6} | {B2M} | {RPS27} | {MT-CO3} | {MT-CO2} | {MT-ND3} | {TMSB4X} | {RPL13} | {RPS29} | {EEF1A1} | {RPLP1} | {RPL41} | {RPL10} | {HBB} | {RPS12} | {TPT1} | {LYZ} | {MT-CYB} | +--------------+----------+----------+-----------+-----------+-----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+ | pbmcsarc3 | [MALAT1] | {HBB} | {MT-ATP6} | {MT-CO3} | {MT-CO1} | {MT-CO2} | {B2M} | {RPL13} | {RPS27} | {RPLP1} | {RPL41} | {RPL10} | {TMSB4X} | {MT-ND3} | {RPS12} | {EEF1A1} | {TPT1} | {RPS18} | {MT-ND4} | {MT-CYB} | +--------------+----------+----------+-----------+-----------+-----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+ | pbmchealthy1 | [MALAT1] | {MT-CO1} | {MT-CO2} | {MT-CO3} | {MT-ATP6} | {B2M} | {RPS27} | {MT-ND3} | {EEF1A1} | {RPL41} | {RPL13} | {RPS12} | {RPL10} | {MT-ND4} | {MT-CYB} | {TPT1} | {TMSB4X} | {RPL34} | {RPL32} | {RPLP1} | +--------------+----------+----------+-----------+-----------+-----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+ | pbmchealthy2 | [MALAT1] | {MT-CO1} | {MT-CO2} | {MT-CO3} | {MT-ATP6} | {MT-ND4} | {EEF1A1} | {B2M} | {MT-CYB} | {RPL10} | {RPS12} | {RPL13} | {RPL41} | {RPLP1} | {TPT1} | {TMSB4X} | {RPS27} | {MT-ND3} | {RPL32} | {MT-ND1} | +--------------+----------+----------+-----------+-----------+-----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+ | pbmchealthy3 | [MALAT1] | {HBB} | {MT-CO3} | {MT-CO2} | {MT-ATP6} | {MT-CO1} | {MT-CYB} | {MT-ND4} | {B2M} | {HBA2} | {MT-ND1} | {RPL10} | {EEF1A1} | {RPS12} | {RPL13} | {TMSB4X} | {MT-ND3} | {TPT1} | {RPL41} | {RPS27} | +--------------+----------+----------+-----------+-----------+-----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+ | pbmchealthy4 | [MALAT1] | {MT-CO1} | {MT-ATP6} | {B2M} | {MT-CO2} | {MT-CO3} | {EEF1A1} | {TMSB4X} | {RPL13} | {TPT1} | {MT-ND3} | {RPL10} | {RPS12} | {RPS27} | {MT-CYB} | {RPL41} | {MT-ND4} | {S100A9} | {RPS18} | {RPL34} | +--------------+----------+----------+-----------+-----------+-----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+
print ("filtering out genes in less than 3 cells")
for adata_list in [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]:
sc.pp.filter_cells(adata_list, min_genes=200)
sc.pp.filter_genes(adata_list, min_cells=3)
filtering out genes in less than 3 cells filtered out 381 cells that have less than 200 genes expressed filtered out 16930 genes that are detected in less than 3 cells filtered out 87 cells that have less than 200 genes expressed filtered out 16207 genes that are detected in less than 3 cells filtered out 192 cells that have less than 200 genes expressed filtered out 17692 genes that are detected in less than 3 cells filtered out 16 cells that have less than 200 genes expressed filtered out 11871 genes that are detected in less than 3 cells filtered out 41 cells that have less than 200 genes expressed filtered out 10844 genes that are detected in less than 3 cells filtered out 142 cells that have less than 200 genes expressed filtered out 14414 genes that are detected in less than 3 cells filtered out 32 cells that have less than 200 genes expressed filtered out 9238 genes that are detected in less than 3 cells
# Identifying mitochondrial genes and ribosomal genes and then calculate QC metrics for each dataset
i=1
for adata_list in [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]:
adata_list.var['mt'] = adata_list.var_names.str.startswith('MT-') # mitochondrial genes 'MT-''
adata_list.var['ribo'] = adata_list.var_names.str.startswith(("RPS","RPL")) # ribosomal genes 'RPS/RPL'
sc.pp.calculate_qc_metrics(adata_list, qc_vars=['mt', 'ribo'], percent_top=None, log1p=False, inplace=True)
display ("sample no"+str(i))
sc.pl.violin(adata_list, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_ribo'],
jitter=0.4, multi_panel=True)
i=i+1
'sample no1'
... storing 'feature_types' as categorical
'sample no2'
... storing 'feature_types' as categorical
'sample no3'
... storing 'feature_types' as categorical
'sample no4'
... storing 'feature_types' as categorical
'sample no5'
... storing 'feature_types' as categorical
'sample no6'
... storing 'feature_types' as categorical
'sample no7'
... storing 'feature_types' as categorical
# Filter cells based on QC metrics
pbmcsarc1 = pbmcsarc1[pbmcsarc1.obs.n_genes_by_counts < 6000, :] #The number of genes expressed in the count matrix
pbmcsarc1 = pbmcsarc1[pbmcsarc1.obs.total_counts < 30000, :] #The total counts per cell
pbmcsarc1 = pbmcsarc1[pbmcsarc1.obs.pct_counts_mt < 19, :] #The percentage of counts in mitochondrial genes
pbmcsarc1 = pbmcsarc1[pbmcsarc1.obs.pct_counts_ribo <60, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
pbmcsarc2 = pbmcsarc2[pbmcsarc2.obs.n_genes_by_counts < 7000, :] #The number of genes expressed in the count matrix
pbmcsarc2 = pbmcsarc2[pbmcsarc2.obs.total_counts < 30000, :] #The total counts per cell
pbmcsarc2 = pbmcsarc2[pbmcsarc2.obs.pct_counts_mt < 15, :] #The percentage of counts in mitochondrial genes
pbmcsarc2= pbmcsarc2[pbmcsarc2.obs.pct_counts_ribo <60, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
pbmcsarc3 = pbmcsarc3[pbmcsarc3.obs.n_genes_by_counts < 5000, :] #The number of genes expressed in the count matrix
pbmcsarc3 = pbmcsarc3[pbmcsarc3.obs.total_counts < 15000, :] #The total counts per cell
pbmcsarc3 = pbmcsarc3[pbmcsarc3.obs.pct_counts_mt < 15, :] #The percentage of counts in mitochondrial genes
pbmcsarc3 = pbmcsarc3[pbmcsarc3.obs.pct_counts_ribo <60, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
pbmchealthy1 = pbmchealthy1[pbmchealthy1.obs.n_genes_by_counts < 8000, :] #The number of genes expressed in the count matrix
pbmchealthy1 = pbmchealthy1[pbmchealthy1.obs.total_counts < 40000, :] #The total counts per cell
pbmchealthy1 = pbmchealthy1[pbmchealthy1.obs.pct_counts_mt < 15, :] #The percentage of counts in mitochondrial genes
pbmchealthy1 = pbmchealthy1[pbmchealthy1.obs.pct_counts_ribo <50, :] #The percentage of counts in ribosomal gene
# Filter cells based on QC metrics
pbmchealthy2 = pbmchealthy2[pbmchealthy2.obs.n_genes_by_counts < 8000, :] #The number of genes expressed in the count matrix
pbmchealthy2 = pbmchealthy2[pbmchealthy2.obs.total_counts < 50000, :] #The total counts per cell
pbmchealthy2 = pbmchealthy2[pbmchealthy2.obs.pct_counts_mt < 15, :] #The percentage of counts in mitochondrial genes
pbmchealthy2 = pbmchealthy2[pbmchealthy2.obs.pct_counts_ribo <50, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
pbmchealthy3 = pbmchealthy3[pbmchealthy3.obs.n_genes_by_counts < 7000, :] #The number of genes expressed in the count matrix
pbmchealthy3 = pbmchealthy3[pbmchealthy3.obs.total_counts < 40000, :] #The total counts per cell
pbmchealthy3 = pbmchealthy3[pbmchealthy3.obs.pct_counts_mt < 15, :] #The percentage of counts in mitochondrial genes
pbmchealthy3 = pbmchealthy3[pbmchealthy3.obs.pct_counts_ribo <50, :] #The percentage of counts in ribosomal genes
# Filter cells based on QC metrics
pbmchealthy4 = pbmchealthy4[pbmchealthy4.obs.n_genes_by_counts < 8000, :] #The number of genes expressed in the count matrix
pbmchealthy4 = pbmchealthy4[pbmchealthy4.obs.total_counts < 50000, :] #The total counts per cell
pbmchealthy4 = pbmchealthy4[pbmchealthy4.obs.pct_counts_mt < 15, :] #The percentage of counts in mitochondrial genes
pbmchealthy4 = pbmchealthy4[pbmchealthy4.obs.pct_counts_ribo <50, :] #The percentage of counts in ribosomal genes
print("PostQC for First Sample: pbmcsarc1")
sc.pl.violin(pbmcsarc1, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Second Sample: pbmcsarc2")
sc.pl.violin(pbmcsarc2, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Third Sample: pbmcsarc3")
sc.pl.violin(pbmcsarc3, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Fourth Sample: pbmchealthy1")
sc.pl.violin(pbmchealthy1, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for Fifth Sample: pbmchealthy2")
sc.pl.violin(pbmchealthy2, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for sixth Sample: pbmchealthy3")
sc.pl.violin(pbmchealthy3, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_ribo'],
jitter=0.4, multi_panel=True)
print("PostQC for seventh Sample: pbmchealthy4")
sc.pl.violin(pbmchealthy4, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_ribo'],
jitter=0.4, multi_panel=True)
PostQC for First Sample: pbmcsarc1
PostQC for Second Sample: pbmcsarc2
PostQC for Third Sample: pbmcsarc3
PostQC for Fourth Sample: pbmchealthy1
PostQC for Fifth Sample: pbmchealthy2
PostQC for sixth Sample: pbmchealthy3
PostQC for seventh Sample: pbmchealthy4
#each cell by total counts over all genes,
# Assuming adata1 to adata7 are your datasets
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Normalization each cell by total counts over all genes (library-size correct) the data matrix to 10,000 reads per cell (target_sum=1e4)
for adata in adata_list:
sc.pp.normalize_total(adata, target_sum=1e4)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy.
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
normalizing counts per cell
finished (0:00:00)
# Assuming adata1 to adata7 are your datasets
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Computes X=log(X+1) , where log denotes the natural logarithm
for adata in adata_list:
sc.pp.log1p(adata)
# Assuming adata1 to adata7 are your datasets
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Apply log1p transformation to each adata
for adata in adata_list:
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:00)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:00)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:01)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:00)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
extracting highly variable genes
finished (0:00:02)
--> added
'highly_variable', boolean vector (adata.var)
'means', float vector (adata.var)
'dispersions', float vector (adata.var)
'dispersions_norm', float vector (adata.var)
# Assuming adata1 to adata7 are your datasets
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Total-count normalize (library-size correct) the data matrix to 10,000 reads per cell
for adata in adata_list:
sc.pl.highly_variable_genes(adata)
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Set raw attribute for each adata
for adata in adata_list:
adata.raw = adata
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Set raw attribute for each adata
for adata in adata_list:
adata = adata[:, adata.var.highly_variable]
#Regress out effects of total counts per cell and the percentage of mitochondrial genes expressed.
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Set raw attribute for each adata
for adata in adata_list:
sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt','pct_counts_ribo'])
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:07:03)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:09:15)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:07:28)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:08:16)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:08:04)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:06:03)
regressing out ['total_counts', 'pct_counts_mt', 'pct_counts_ribo']
sparse input is densified and may lead to high memory use
finished (0:15:05)
# Scale each gene to unit variance up to standard deviation 10 to all samples
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
for adata in adata_list:
sc.pp.scale(adata, max_value=10)
import copy
import matplotlib.pyplot as plt
# Create a deep copy of adata_list
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
adata_list_copy = copy.deepcopy(adata_list)
# Initialize an empty list to store the variance ratios
variance_ratios = []
for n_pcs in range(1, 52):
temp_variances = []
for adata_temp in adata_list_copy: # Iterate over each AnnData object
adata_temp = adata_temp.copy() # Create a copy of the AnnData object
sc.tl.pca(adata_temp, n_comps=n_pcs, svd_solver='arpack')
temp_variances.append(adata_temp.uns['pca']['variance_ratio'])
variance_ratios.append(temp_variances)
# Plot the explained variance ratio for each PC
plt.figure(figsize=(10, 6))
for n_pcs in range(1, 52):
for idx, var_ratio in enumerate(variance_ratios[n_pcs - 1], 1):
plt.plot(range(1, n_pcs+1), var_ratio, marker='o', label=f'n_pcs={n_pcs}, dataset={idx}')
plt.xlabel('Number of PCs')
plt.ylabel('Explained Variance Ratio')
plt.title('Explained Variance Ratio for Each PC')
plt.legend()
plt.show()
# Delete temporary objects
del adata_list_copy
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=1
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=2
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=3
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=4
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=5
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=6
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=7
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=8
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=9
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:00)
computing PCA
on highly variable genes
with n_comps=10
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=11
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=12
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=13
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=14
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=15
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=16
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=17
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=18
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=19
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=21
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=22
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=23
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:01)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=25
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=26
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=27
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=28
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=29
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=30
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=31
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=32
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=32
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=32
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=32
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=32
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=32
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=32
finished (0:00:08)
computing PCA
on highly variable genes
with n_comps=33
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=33
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=33
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=33
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=33
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=33
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=33
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=34
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=34
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=34
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=34
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=34
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=34
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=34
finished (0:00:08)
computing PCA
on highly variable genes
with n_comps=35
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=35
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=35
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=35
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=35
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=35
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=35
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=36
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=36
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=36
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=36
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=36
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=36
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=36
finished (0:00:08)
computing PCA
on highly variable genes
with n_comps=37
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=37
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=37
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=37
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=37
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=37
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=37
finished (0:00:08)
computing PCA
on highly variable genes
with n_comps=38
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=38
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=38
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=38
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=38
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=38
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=38
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=39
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=39
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=39
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=39
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=39
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=39
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=39
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=40
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=40
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=40
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=40
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=40
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=40
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=40
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=41
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=41
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=41
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=41
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=41
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=41
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=41
finished (0:00:09)
computing PCA
on highly variable genes
with n_comps=42
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=42
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=42
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=42
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=42
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=42
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=42
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=43
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=43
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=43
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=43
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=43
finished (0:00:08)
computing PCA
on highly variable genes
with n_comps=43
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=43
finished (0:00:09)
computing PCA
on highly variable genes
with n_comps=44
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=44
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=44
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=44
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=44
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=44
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=44
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=45
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=45
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=45
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=45
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=45
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=45
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=45
finished (0:00:11)
computing PCA
on highly variable genes
with n_comps=46
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=46
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=46
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=46
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=46
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=46
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=46
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=47
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=47
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=47
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=47
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=47
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=47
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=47
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=48
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=48
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=48
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=48
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=48
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=48
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=48
finished (0:00:08)
computing PCA
on highly variable genes
with n_comps=49
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=49
finished (0:00:08)
computing PCA
on highly variable genes
with n_comps=49
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=49
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=49
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=49
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=49
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:02)
computing PCA
on highly variable genes
with n_comps=50
finished (0:00:09)
computing PCA
on highly variable genes
with n_comps=51
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=51
finished (0:00:08)
computing PCA
on highly variable genes
with n_comps=51
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=51
finished (0:00:07)
computing PCA
on highly variable genes
with n_comps=51
finished (0:00:05)
computing PCA
on highly variable genes
with n_comps=51
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=51
finished (0:00:07)
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Set raw attribute for each adata
for adata in adata_list:
sc.tl.pca(adata, svd_solver='arpack', n_comps=20)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:06)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:04)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:03)
computing PCA
on highly variable genes
with n_comps=20
finished (0:00:09)
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Set raw attribute for each adata
for adata in adata_list:
sc.pl.pca(adata, color= ['CD14', 'CD79A','CD3D', 'FCER1A','NKG7','CST3'])
#scatter plot generation in the PCA coordinates, with 'CD14', 'CD79A','CD3D', 'FCER1A','NKG7' and 'CST3'
print("CD14: CD14+ Monocytes, CD79A: B cell, CD3D : CD4+ T cell, FCER1A: CD16+ Monocyte, NKG7: NK cell, CST3: Dendritic cells")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
CD14: CD14+ Monocytes, CD79A: B cell, CD3D : CD4+ T cell, FCER1A: CD16+ Monocyte, NKG7: NK cell, CST3: Dendritic cells
import os
from scipy import io
save_files = [
'/home/jana/pbmcsarc1.h5ad',
'/home/jana/pbmcsarc2.h5ad',
'/home/jana/pbmcsarc3.h5ad',
'/home/jana/pbmchealth1.h5ad',
'/home/jana/pbmchealth2.h5ad',
'/home/jana/pbmchealth3.h5ad',
'/home/jana/pbmchealth4.h5ad'
]
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Save each adata to the corresponding file
for adata, save_file in zip(adata_list, save_files):
adata.write_h5ad(save_file)
Deleting individual datasets to save space
# Deleting individual datasets to save space
del(pbmcsarc1, pbmcsarc2,pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4)
#Reading last saved annoatated data object written in h5ad data format.
#We used similar adata variable to make similar previous data analysis
# List of file paths
file_paths = [
'/home/jana/pbmcsarc1.h5ad',
'/home/jana/pbmcsarc2.h5ad',
'/home/jana/pbmcsarc3.h5ad',
'/home/jana/pbmchealth1.h5ad',
'/home/jana/pbmchealth2.h5ad',
'/home/jana/pbmchealth3.h5ad',
'/home/jana/pbmchealth4.h5ad'
]
# List to store loaded data objects
data_objects = []
# Loop to read h5ad files and store data objects
for file_path in file_paths:
data_objects.append(sc.read_h5ad(file_path))
# Unpack data objects to individual variables
pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4 = data_objects
Displaying all samples in this workspace
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
for adata in adata_list:
print (adata)
AnnData object with n_obs × n_vars = 6962 × 19671
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 9779 × 20394
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 8324 × 18909
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 5921 × 24730
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 4881 × 25757
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 3733 × 22187
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
AnnData object with n_obs × n_vars = 11808 × 27363
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca'
obsm: 'X_pca'
varm: 'PCs'
datasets = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Compute UMAP coordinates for each dataset
for dataset in datasets:
sc.pp.neighbors(dataset, n_neighbors=10, n_pcs=20)
sc.tl.umap(dataset)
i=1
# Perform Leiden clustering for each dataset at different resolutions
for dataset in datasets:
sc.tl.leiden(dataset)
sc.tl.leiden(dataset, key_added="leiden_res0_20", resolution=0.20)
sc.tl.leiden(dataset, key_added="leiden_res0_40", resolution=0.40)
sc.tl.leiden(dataset, key_added="leiden_res0_60", resolution=0.60)
sc.tl.leiden(dataset, key_added="leiden_res0_80", resolution=0.80)
sc.tl.leiden(dataset, key_added="leiden_res1", resolution=1.0)
# Plot UMAP visualization with different cluster labels
display ("sample no"+str(i))
sc.pl.umap(dataset, color=["leiden_res0_20", "leiden_res0_40", "leiden_res0_60", "leiden_res0_80", "leiden_res1"], legend_loc="on data")
i=i+1
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:03)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:22)
computing neighbors
using 'X_pca' with n_pcs = 20
/home/jana/my-notebook-venv/lib/python3.8/site-packages/numba/core/typed_passes.py:334: NumbaPerformanceWarning:
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.
To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.
File "my-notebook-venv/lib/python3.8/site-packages/umap/nndescent.py", line 47:
@numba.njit(parallel=True)
def nn_descent(
^
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:04)
computing UMAP
/home/jana/my-notebook-venv/lib/python3.8/site-packages/umap/spectral.py:227: UserWarning: Embedding a total of 2 separate connected components using meta-embedding (experimental)
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:32)
computing neighbors
using 'X_pca' with n_pcs = 20
/home/jana/my-notebook-venv/lib/python3.8/site-packages/numba/core/typed_passes.py:334: NumbaPerformanceWarning:
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.
To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.
File "my-notebook-venv/lib/python3.8/site-packages/umap/nndescent.py", line 47:
@numba.njit(parallel=True)
def nn_descent(
^
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:03)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:27)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing UMAP
/home/jana/my-notebook-venv/lib/python3.8/site-packages/umap/spectral.py:227: UserWarning: Embedding a total of 2 separate connected components using meta-embedding (experimental)
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:18)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing UMAP
/home/jana/my-notebook-venv/lib/python3.8/site-packages/umap/spectral.py:227: UserWarning: Embedding a total of 3 separate connected components using meta-embedding (experimental)
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:15)
computing neighbors
using 'X_pca' with n_pcs = 20
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
/home/jana/my-notebook-venv/lib/python3.8/site-packages/umap/spectral.py:227: UserWarning: Embedding a total of 2 separate connected components using meta-embedding (experimental)
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:11)
computing neighbors
using 'X_pca' with n_pcs = 20
/home/jana/my-notebook-venv/lib/python3.8/site-packages/numba/core/typed_passes.py:334: NumbaPerformanceWarning:
The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.
To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.
File "my-notebook-venv/lib/python3.8/site-packages/umap/nndescent.py", line 47:
@numba.njit(parallel=True)
def nn_descent(
^
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:04)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:15)
running Leiden clustering
finished: found 23 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 11 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 15 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 18 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 21 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 23 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no1'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 22 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 10 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 14 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 16 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 20 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 22 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:01)
'sample no2'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 21 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 11 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 12 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 16 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 20 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 21 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:01)
'sample no3'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 22 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 9 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 13 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 18 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 22 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 22 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no4'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 24 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 11 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 14 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 17 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 21 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 24 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no5'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 23 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 14 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 15 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 18 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 21 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:00)
running Leiden clustering
finished: found 23 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:00)
'sample no6'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
running Leiden clustering
finished: found 27 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 13 clusters and added
'leiden_res0_20', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 17 clusters and added
'leiden_res0_40', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 20 clusters and added
'leiden_res0_60', the cluster labels (adata.obs, categorical) (0:00:01)
running Leiden clustering
finished: found 24 clusters and added
'leiden_res0_80', the cluster labels (adata.obs, categorical) (0:00:02)
running Leiden clustering
finished: found 27 clusters and added
'leiden_res1', the cluster labels (adata.obs, categorical) (0:00:01)
'sample no7'
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
Displaying all samples in this workspace
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
for adata in adata_list:
print (adata)
AnnData object with n_obs × n_vars = 6962 × 19671
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'leiden', 'umap', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 9779 × 20394
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 8324 × 18909
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 5921 × 24730
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 4881 × 25757
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 3733 × 22187
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
AnnData object with n_obs × n_vars = 11808 × 27363
obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'leiden', 'leiden_res0_20', 'leiden_res0_40', 'leiden_res0_60', 'leiden_res0_80', 'leiden_res1'
var: 'gene_ids', 'feature_types', 'n_cells', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'hvg', 'log1p', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_res0_20_colors', 'leiden_res0_40_colors', 'leiden_res0_60_colors', 'leiden_res0_80_colors', 'leiden_res1_colors'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'distances', 'connectivities'
save_files = [
'/home/jana/pbmcsarc1.h5ad',
'/home/jana/pbmcsarc2.h5ad',
'/home/jana/pbmcsarc3.h5ad',
'/home/jana/pbmchealth1.h5ad',
'/home/jana/pbmchealth2.h5ad',
'/home/jana/pbmchealth3.h5ad',
'/home/jana/pbmchealth4.h5ad'
]
adata_list = [pbmcsarc1, pbmcsarc2, pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4]
# Save each adata to the corresponding file
for adata, save_file in zip(adata_list, save_files):
adata.write_h5ad(save_file)
Deleting individual datasets to save space
# Deleting individual datasets to save space
del(pbmcsarc1, pbmcsarc2,pbmcsarc3, pbmchealthy1, pbmchealthy2, pbmchealthy3, pbmchealthy4)